"""
ch-1词云图，不基于词频
"""


import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt


novel_path = '../../notebook/ch-1.txt'
font_path = r'C:\WINDOWS\FONTS\DENGL.TTF'
stopwords_path = '../../notebook/stop-words.txt'


"""分词"""
# 读取停用词
with open(stopwords_path, 'r', encoding='utf-8') as f:
    stopwords = [line.strip() for line in f]
# print(stopwords[:10])

# 读取小说内容
with open(novel_path, 'r', encoding='utf-8') as f:
    ch1 = f.read()
# 排除停用词和单个词
words = [word for word in jieba.cut(ch1)
         if word not in stopwords and len(word) > 1]
space_sentence = ' '.join(words)
print(space_sentence[:500])

"""创建词云图"""
wc = WordCloud(
    font_path=font_path,
    width=800, height=600,
    mode='RGBA', background_color=None
)
# 生成词云
wc.generate(space_sentence)
# 保存到本地
wc.to_file('ch-1-wc.png')

"""通过matplotlib显示"""
plt.imshow(wc)
plt.axis('off')
plt.show()